knitr::opts_chunk$set(warning = FALSE, message = FALSE, fig.path = "figures/", fig.height=6.51,fig.width=8.49, dev='png', fig.process = function(filename) {
new_filename <- stringr::str_remove(string = filename,
pattern = "-1")
fs::file_move(path = filename, new_path = new_filename)
ifelse(fs::file_exists(new_filename), new_filename, filename)
})
set.seed(29106799)
libs <- c("ggplot2", "pracma", "glmnet", "extRemes", "foreach", "doParallel", "gridExtra", "pROC", "MLmetrics", "energy")
sapply(libs,require,character.only = T, quietly = TRUE, warn.conflicts = FALSE)
## ggplot2 pracma glmnet extRemes foreach doParallel
## TRUE TRUE TRUE TRUE TRUE TRUE
## gridExtra pROC MLmetrics energy
## TRUE TRUE TRUE TRUE
require(knitr,quietly=TRUE,warn.conflicts = FALSE)
knit("PredictEpidemicTools.Rmd",quiet = TRUE)
## [1] "PredictEpidemicTools.md"
detectCores()
## [1] 8
cl <- makeCluster(7) #set to number of cores -1
registerDoParallel(cl)
getDoParWorkers()
## [1] 7
incidences <- read.csv("ILIincidences1985-2019.csv", sep = ";")
incidences <- incidences[!is.na(incidences$t_inc),]
incidences$t_inc <-
as.numeric(levels(incidences$t_inc))[incidences$t_inc]
incidences$yearweek <- as.numeric(incidences$yearweek)
dates <-
seq(as.Date("1985-01-01") , as.Date("2019-03-10"), by = "weeks")
incidences$dates <- dates
incidences$weeknum <-
seq(1, length = length(incidences$year), by = 1)
incidences <-
incidences[-228, ] # Missing data on the original file.
debut_sentinelles <- c()
fin_sentinelles <- c()
for (i in unique(incidences$season)) {
temp <-
incidences[(incidences$epid == 1) &
(incidences$season == i),]$weeknum
debut_sentinelles <- c(debut_sentinelles, min(temp))
fin_sentinelles <- c(fin_sentinelles, max(temp))
}
l.epid_serf <- fin_sentinelles - debut_sentinelles + 1
longest <- max(l.epid_serf)
shortest <- min(l.epid_serf)
year_longest <- unique(incidences$season)[which.max(l.epid_serf)]
year_shortest <- unique(incidences$season)[which.min(l.epid_serf)]
In the Serfling definition of an epidemic, the shortest epidemic between 1985 and 2019 was 5 weeks in 1991 and the longest 16 in 2010 .
d1 <- 1
d2 <- longest
sent_epid_data <-
EpidemicDataFrameSentinelles(data = incidences, d1 = d1, d2 = d2)
peaks <- c()
for (i in unique(sent_epid_data$season)) {
peaks <- c(peaks, max(sent_epid_data[sent_epid_data$season == i, ]$t_inc, na.rm = T))
}
highest_peaks <- max(peaks)
lowest_peaks <- min(peaks)
year_highest <- unique(incidences$season)[which.max(peaks)]
year_lowest <- unique(incidences$season)[which.min(peaks)]
In the Serfling definition of an epidemic, the value of the highest epidemic peak between 1985 and 2019 was 1793 in 1989 and the value of the lowest 325 in 325 .
x.plot <-
ggplot(data = incidences, aes(x = dates, y = t_inc)) + geom_line(size = 0.3)
x.plot <-
x.plot + xlab("Years") + ylab("Incidence rates")
x.plot <-
x.plot + scale_x_date(date_breaks = "3 years", date_labels = "%Y")
x.plot <- x.plot + geom_segment(aes(xend = as.Date("1989-01-01"), yend = 1850, x = as.Date("1989-01-01"), y = 1940),
arrow = arrow(length = unit(0.1,"cm")), col ='blue')
x.plot <- x.plot + geom_segment(aes(xend = as.Date("2014-02-01"), yend = 375, x = as.Date("2014-02-01"), y = 480),
arrow = arrow(length = unit(0.1,"cm")), col ='blue')
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 14),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
plot.title = element_text(size = 14)
)
x.plot
Put the 2019 epidemic aside save it for a test case
incidences2018 <- subset(x = incidences, subset = (season < 2019))
nb.epid <-
length(unique(incidences2018$season)) ## number of epidemics between 1985 and 2019
There are 34 epidemics between 1985 and 2018.
level_epid <- 0.88
thres_epid <-
quantile(incidences2018$t_inc, probs = level_epid, na.rm = T)
The threshold for epidemic definition is chosen as the 0.88 and is equal to 272.44.
d <- longest
epid_data_thres<-
EpidemicDataFrame(data = incidences2018, d = d, thres = thres_epid)
l.epid <- c()
for (i in unique(epid_data_thres$season)) {
l.epid <-
c(l.epid, length(epid_data_thres[(epid_data_thres$season == i) &
(!is.na(epid_data_thres$t_inc)),]$t_inc))
}
nb.epid <- length(l.epid)
longest_thres <- max(l.epid)
shortest_thres <- min(l.epid)
year_longest_thres <- unique(incidences$season)[which.max(l.epid)]
year_shortest_thres <- unique(incidences$season)[which.min(l.epid)]
In our definition of an epidemic, there are 34 epidemics, and the shortest epidemic between 1985 and 2019 was 3 weeks in 2014 and the longest 12 in 1985.
d <- longest_thres
epid_data_thres <-
EpidemicDataFrame(data = incidences2018, d = d, thres = thres_epid)
peaks_thres <- c()
for (i in unique(epid_data_thres$season)) {
peaks_thres <- c(peaks_thres, max(epid_data_thres[epid_data_thres$season == i, ]$t_inc, na.rm = T))
}
highest_peaks_thres <- max(peaks_thres)
lowest_peaks_thres <- min(peaks_thres)
year_highest_thres <- unique(incidences$season)[which.max(peaks_thres)]
year_lowest_thres <- unique(incidences$season)[which.min(peaks_thres)]
In the new defintion of an epidemic, the value of the highest epidemic peak between 1985 and 2019 was 1793 in 1989 and the value of the lowest 325 in 325 .
d1 <- 1
d2 <- longest
sent_epid_data <-
EpidemicDataFrameSentinelles(data = incidences2018, d1 = d1, d2 = d2)
df.x <- sent_epid_data
x.plot <- ggplot(data = df.x, aes(x = 1:d2, y = t_inc))
for (i in 1:nb.epid) {
x.plot <-
x.plot + geom_line(data = df.x[df.x$season == 1984 + i,], aes(x = weeks, y = t_inc), size =
0.3)
}
x.plot <- x.plot + scale_x_continuous(breaks = seq(1, 16, 1))
x.plot <-
x.plot + xlab("Epidemic weeks") + ylab("Incidence rates")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot
df.x <- subset(epid_data_thres, weeks < d + 1)
x.plot <- ggplot(data = df.x, aes(x = 1:d, y = t_inc))
for (i in 1:nb.epid) {
x.plot <-
x.plot + geom_line(data = df.x[df.x$season == 1984 + i,], aes(x = weeks, y = t_inc), size =
0.3)
}
x.plot <- x.plot + scale_x_continuous(breaks = seq(1, 16, 1), labels = seq(1, 16, 1)) + expand_limits(x=c(0,16))
x.plot <-
x.plot + xlab("Epidemic weeks") + ylab("Incidence rates")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot
d <- 3
epid_data <-
EpidemicDataFrame(data = incidences2018, d = d, thres = thres_epid)
acf.temp <- acf(x = epid_data[epid_data$weeks == 1,]$t_inc,lag.max = 10, plot = F)
acf.week1 <- data.frame(lag = acf.temp$lag, acf = acf.temp$acf)
ciline <- qnorm((1 - 0.95)/2)/sqrt(length(epid_data[epid_data$weeks == 1,]$t_inc))
x.plot <- ggplot(data = acf.week1, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = -ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Autocorrelation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
adcf.temp <- adcf(x = epid_data[epid_data$weeks == 1,]$t_inc,lag.range=1:10,nreps=200)
adcf.week1 <- data.frame(lag = adcf.temp$lag, adcf = adcf.temp$adcf)
ciline_u <- adcf.temp$u[1]
ciline_l <- adcf.temp$l[1]
x.plot <- ggplot(data = adcf.week1, aes(x = lag , y = adcf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline_u, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = ciline_l, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Auto Distance Correlation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
acf.temp <- acf(x = epid_data[epid_data$weeks == 2,]$t_inc,lag.max = 10, plot = F)
acf.week2 <- data.frame(lag = acf.temp$lag, acf = acf.temp$acf)
ciline <- qnorm((1 - 0.95)/2)/sqrt(length(epid_data[epid_data$weeks == 2,]$t_inc))
x.plot <- ggplot(data = acf.week2, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = -ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Autocorrelation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
adcf.temp <- adcf(x = epid_data[epid_data$weeks == 2,]$t_inc,lag.range=1:10,nreps=200)
adcf.week2 <- data.frame(lag = adcf.temp$lag, adcf = adcf.temp$adcf)
ciline_u <- adcf.temp$u[1]
ciline_l <- adcf.temp$l[1]
x.plot <- ggplot(data = adcf.week2, aes(x = lag , y = adcf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline_u, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = ciline_l, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Auto Distance Correlation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
acf.temp <- acf(x = epid_data[epid_data$weeks == 3,]$t_inc,lag.max = 10, plot = F)
acf.week3 <- data.frame(lag = acf.temp$lag, acf = acf.temp$acf)
ciline <- qnorm((1 - 0.95)/2)/sqrt(length(epid_data[epid_data$weeks == 3,]$t_inc))
x.plot <- ggplot(data = acf.week3, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = -ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Autocorrelation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
adcf.temp <- adcf(x = epid_data[epid_data$weeks == 3,]$t_inc,lag.range=1:10,nreps=200)
adcf.week3 <- data.frame(lag = adcf.temp$lag, adcf = adcf.temp$adcf)
ciline_u <- adcf.temp$u[1]
ciline_l <- adcf.temp$l[1]
x.plot <- ggplot(data = adcf.week3, aes(x = lag , y = adcf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline_u, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = ciline_l, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Auto Distance Correlation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
ccf.temp <- acf(x = epid_data[epid_data$weeks == 1,]$t_inc,y = epid_data[epid_data$weeks == 2,]$t_inc,lag.max = 10, plot = F)
cdcf.temp <- c(x = epid_data[epid_data$weeks == 1,]$t_inc,y = epid_data[epid_data$weeks == 2,]$t_inc,lag.range=1:10,nreps=200)
ccf.week1.2 <- data.frame(lag = ccf.temp$lag, acf = ccf.temp$acf)
ciline <- qnorm((1 - 0.95)/2)/sqrt(length(epid_data[epid_data$weeks == 3,]$t_inc))
x.plot <- ggplot(data = ccf.week1.2, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = -ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Crosscorrelation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
cdcf.temp <- cdcf(x = epid_data[epid_data$weeks == 1,]$t_inc,y = epid_data[epid_data$weeks == 2,]$t_inc,lag.range=1:10,nreps=200)
cdcf.week1.2 <- data.frame(lag = cdcf.temp$lag, acf = cdcf.temp$cdcf)
ciline_u <- cdcf.temp$u[1]
ciline_l <- cdcf.temp$l[1]
x.plot <- ggplot(data = cdcf.week1.2, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline_u, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = ciline_l, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Cross distance correlation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
ccf.temp <- acf(x = epid_data[epid_data$weeks == 1,]$t_inc,y = epid_data[epid_data$weeks == 3,]$t_inc,lag.max = 10, plot = F)
ccf.week1.3 <- data.frame(lag = ccf.temp$lag, acf = ccf.temp$acf)
ciline <- qnorm((1 - 0.95)/2)/sqrt(length(epid_data[epid_data$weeks == 3,]$t_inc))
x.plot <- ggplot(data = ccf.week1.3, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = -ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Crosscorrelation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
cdcf.temp <- cdcf(x = epid_data[epid_data$weeks == 1,]$t_inc,y = epid_data[epid_data$weeks == 3,]$t_inc,lag.range=1:10,nreps=200)
cdcf.week1.3 <- data.frame(lag = cdcf.temp$lag, acf = cdcf.temp$cdcf)
ciline_u <- cdcf.temp$u[1]
ciline_l <- cdcf.temp$l[1]
x.plot <- ggplot(data = cdcf.week1.3, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline_u, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = ciline_l, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Cross distance correlation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
ccf.temp <- acf(x = epid_data[epid_data$weeks == 2,]$t_inc,y = epid_data[epid_data$weeks == 3,]$t_inc,lag.max = 10, plot = F)
ccf.week2.3 <- data.frame(lag = ccf.temp$lag, acf = ccf.temp$acf)
ciline <- qnorm((1 - 0.95)/2)/sqrt(length(epid_data[epid_data$weeks == 3,]$t_inc))
x.plot <- ggplot(data = ccf.week2.3, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = -ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Crosscorrelation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
cdcf.temp <- cdcf(x = epid_data[epid_data$weeks == 2,]$t_inc,y = epid_data[epid_data$weeks == 3,]$t_inc,lag.range=1:10,nreps=200)
cdcf.week2.3 <- data.frame(lag = cdcf.temp$lag, acf = cdcf.temp$cdcf)
ciline_u <- cdcf.temp$u[1]
ciline_l <- cdcf.temp$l[1]
x.plot <- ggplot(data = cdcf.week2.3, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline_u, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = ciline_l, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Cross distance correlation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
level_gpd <- c(0.9, 0.9, 0.9)
thres_gpd <- rep(NA, 3)
for (i in 1:3) {
thres_gpd[i] <-
quantile(incidences2018$t_inc, probs = level_gpd[i], na.rm = T)
}
The GPD threshold for Weeks 1, 2 and 3 is defined as the 0.9, 0.9, 0.9-quantile of the data, that is 339.2, 339.2, 339.2.
`%nin%` = Negate(`%in%`)
epid_data$excess <- rep(NA, length(epid_data$season))
for (j in unique(epid_data$season)) {
temp <- epid_data[epid_data$season == j,]
if (sum(temp$t_inc > thres_gpd, na.rm = T) == 0) {
epid_data[epid_data$season == j,]$excess <- rep(NA, d)
}
else {
epid_data[epid_data$season == j,]$excess <-
temp$t_inc - thres_gpd
}
}
excess1 <- epid_data[(epid_data$weeks == 1),]$excess
acf.temp <- acf(x = excess1[!is.na(excess1)],lag.max = 10, plot = F)
acf.excess1 <- data.frame(lag = acf.temp$lag, acf = acf.temp$acf)
ciline <- qnorm((1 - 0.95)/2)/sqrt(length(excess1[!is.na(excess1)]))
x.plot <- ggplot(data = acf.excess1, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = -ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Autocorrelation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
adcf.temp <- adcf(x = excess1[!is.na(excess1)],lag.range=1:10,nreps=200)
adcf.excess1 <- data.frame(lag = adcf.temp$lag, adcf = adcf.temp$adcf)
ciline_u <- adcf.temp$u[1]
ciline_l <- adcf.temp$l[1]
x.plot <- ggplot(data = adcf.excess1, aes(x = lag , y = adcf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline_u, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = ciline_l, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Auto Distance Correlation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
excess2 <- epid_data[(epid_data$weeks == 2),]$excess
acf.temp <- acf(x = excess2[!is.na(excess2)],lag.max = 10, plot = F)
acf.excess2 <- data.frame(lag = acf.temp$lag, acf = acf.temp$acf)
ciline <- qnorm((1 - 0.95)/2)/sqrt(length(excess2[!is.na(excess2)]))
x.plot <- ggplot(data = acf.excess2, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = -ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Autocorrelation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
adcf.temp <- adcf(x = excess2[!is.na(excess2)],lag.range=1:10,nreps=200)
adcf.excess2 <- data.frame(lag = adcf.temp$lag, adcf = adcf.temp$adcf)
ciline_u <- adcf.temp$u[1]
ciline_l <- adcf.temp$l[1]
x.plot <- ggplot(data = adcf.excess2, aes(x = lag , y = adcf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline_u, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = ciline_l, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Auto Distance Correlation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
excess3 <- epid_data[(epid_data$weeks == 3),]$excess
acf.temp <- acf(x = excess3[!is.na(excess3)],lag.max = 10, plot = F)
acf.excess3 <- data.frame(lag = acf.temp$lag, acf = acf.temp$acf)
ciline <- qnorm((1 - 0.95)/2)/sqrt(length(excess3[!is.na(excess3)]))
x.plot <- ggplot(data = acf.excess3, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = -ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Autocorrelation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
adcf.temp <- adcf(x = excess3[!is.na(excess3)],lag.range=1:10,nreps=200)
adcf.excess3 <- data.frame(lag = adcf.temp$lag, adcf = adcf.temp$adcf)
ciline_u <- adcf.temp$u[1]
ciline_l <- adcf.temp$l[1]
x.plot <- ggplot(data = adcf.excess3, aes(x = lag , y = adcf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline_u, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = ciline_l, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Auto Distance Correlation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
ccf.temp <- acf(x = excess1[!is.na(excess1)],y = excess2[!is.na(excess2)],lag.max = 10, plot = F)
ccf.excess1.2 <- data.frame(lag = ccf.temp$lag, acf = ccf.temp$acf)
ciline <- qnorm((1 - 0.95)/2)/sqrt(length(excess1))
x.plot <- ggplot(data = ccf.excess1.2, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = -ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Crosscorrelation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
cdcf.temp <- cdcf(x = excess1[!is.na(excess1)],y = excess2[!is.na(excess2)],lag.range=1:10,nreps=200)
cdcf.excess1.2 <- data.frame(lag = cdcf.temp$lag, acf = cdcf.temp$cdcf)
ciline_u <- cdcf.temp$u[1]
ciline_l <- cdcf.temp$l[1]
x.plot <- ggplot(data = cdcf.excess1.2, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline_u, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = ciline_l, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Cross distance correlation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
ccf.temp <- acf(x = excess1[!is.na(excess1)],y = excess3[!is.na(excess3)],lag.max = 10, plot = F)
ccf.excess1.3 <- data.frame(lag = ccf.temp$lag, acf = ccf.temp$acf)
ciline <- qnorm((1 - 0.95)/2)/sqrt(length(excess1))
x.plot <- ggplot(data = ccf.excess1.3, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = -ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Crosscorrelation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
cdcf.temp <- cdcf(x = excess1[!is.na(excess1)],y = excess3[!is.na(excess3)],lag.range=1:10,nreps=200)
cdcf.excess1.3 <- data.frame(lag = cdcf.temp$lag, acf = cdcf.temp$cdcf)
ciline_u <- cdcf.temp$u[1]
ciline_l <- cdcf.temp$l[1]
x.plot <- ggplot(data = cdcf.excess1.3, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline_u, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = ciline_l, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Cross distance correlation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
ccf.temp <- acf(x = excess2[!is.na(excess2)],y = excess3[!is.na(excess3)],lag.max = 10, plot = F)
ccf.excess2.3 <- data.frame(lag = ccf.temp$lag, acf = ccf.temp$acf)
ciline <- qnorm((1 - 0.95)/2)/sqrt(length(excess1))
x.plot <- ggplot(data = ccf.excess2.3, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = -ciline, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Crosscorrelation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
cdcf.temp <- cdcf(x = excess2[!is.na(excess2)],y = excess3[!is.na(excess3)],lag.range=1:10,nreps=200)
cdcf.excess2.3 <- data.frame(lag = cdcf.temp$lag, acf = cdcf.temp$cdcf)
ciline_u <- cdcf.temp$u[1]
ciline_l <- cdcf.temp$l[1]
x.plot <- ggplot(data = cdcf.excess2.3, aes(x = lag , y = acf)) + geom_bar(stat = 'identity', width = 0.03)
x.plot <- x.plot + geom_hline(yintercept = ciline_u, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = ciline_l, color = "blue",linetype =2)
x.plot <- x.plot + geom_hline(yintercept = 0, size = 0.2)
x.plot <-
x.plot + xlab("Lag") + ylab("Cross distance correlation")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + scale_x_continuous(breaks = seq(0, 10, 2)) +scale_y_continuous(breaks = seq(-1, 1, .2))
x.plot
univ.fit_exp <- list()
sigma_univ <- rep(0, d)
df.param0 <- c(NA, NA, NA)
for (i in 1:(d)) {
week <-
epid_data[(epid_data$weeks == i) &
!is.na(epid_data$t_inc),]$t_inc
univ.fit_exp[[i]] <-
fevd(
x = week,
threshold = thres_gpd[i],
type = "Exponential",
method = "MLE"
)
sigma_univ[i] <- univ.fit_exp[[i]]$results$par[1]
ci.univ.fit <-
ci.fevd(univ.fit_exp[[i]], 0.05, type = 'parameter')
df.param0 <- rbind(df.param0, ci.univ.fit)
}
df.param0 <- df.param0[-1, ]
rownames(df.param0) <- c("Week1", "Week2", "Week3")
kable(df.param0)
| 95% lower CI | scale | 95% upper CI | |
|---|---|---|---|
| Week1 | 40.4452 | 72.0000 | 103.5548 |
| Week2 | 166.1306 | 256.3806 | 346.6307 |
| Week3 | 251.5334 | 391.7000 | 531.8666 |
univ.fit_gpd <- list()
for (i in 1:d) {
week <-
epid_data[(epid_data$weeks == i) &
!is.na(epid_data$t_inc), ]$t_inc
univ.fit_gpd[[i]]<- fevd(
x = week,
threshold = thres_gpd[i],
type = "GP",
method = "MLE"
)
}
pvalue <- rep(0, 3)
for (i in 1:d) {
lr <- lr.test(univ.fit_gpd[[i]],univ.fit_exp[[i]])
pvalue[i] <- lr$p.value
}
The p-values of the LR tests for \(\gamma = 0\) for Weeks 1, 2 and 3 are equal to 0.6568338, 0.5730354, 0.6410537 respectively.
week1 <-
epid_data[(epid_data$weeks == 1) &
!is.na(epid_data$t_inc),]$t_inc
obs1 <- week1[week1 > thres_gpd[1]] - thres_gpd[1]
theo1 <-
qevd(
p = ppoints(length(obs1)),
scale = sigma_univ[1],
threshold = 0,
type = "Exponential"
)
df_qq1 <- data.frame(Obs = sort(obs1), Theo = theo1)
qq.plot1 <-
ggplot(data = df_qq1, aes(x = Obs, y = Theo)) + geom_point(shape = 1, size = 2.5)
qq.plot1 <-
qq.plot1 + geom_abline(
slope = 1,
intercept = 0,
col = "blue",
linetype = 2,
size = 0.5
)
qq.plot1 <- qq.plot1 + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
qq.plot1 <- qq.plot1 + xlim(0, 300) + ylim(0 , 300) + coord_fixed()
qq.plot1 <- qq.plot1 + xlab("Observed") + ylab("Theorical")
qq.plot1
week2 <-
epid_data[(epid_data$weeks == 2) &
!is.na(epid_data$t_inc),]$t_inc
obs2 <- week2[week2 > thres_gpd[2]] - thres_gpd[2]
theo2 <-
qevd(
p = ppoints(length(obs2)),
scale = sigma_univ[2],
threshold = 0,
type = "Exponential"
)
df_qq2 <- data.frame(Obs = sort(obs2), Theo = theo2)
qq.plot2 <-
ggplot(data = df_qq2, aes(x = Obs, y = Theo)) + geom_point(shape = 1, size = 2.5)
qq.plot2 <-
qq.plot2 + geom_abline(
slope = 1,
intercept = 0,
col = "blue",
linetype = 2,
size = 0.5
)
qq.plot2 <- qq.plot2 + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
qq.plot2 <- qq.plot2 + xlim(0, 1100) + ylim(0 , 1100) + coord_fixed()
qq.plot2 <- qq.plot2 + xlab("Observed") + ylab("Theorical")
qq.plot2
week3 <-
epid_data[(epid_data$weeks == 3) &
!is.na(epid_data$t_inc),]$t_inc
obs3 <- week3[week3 > thres_gpd[3]] - thres_gpd[3]
theo3 <-
qevd(
p = ppoints(length(obs3)),
scale = sigma_univ[3],
threshold = 0,
type = "Exponential"
)
df_qq3 <- data.frame(Obs = sort(obs3), Theo = theo3)
qq.plot3 <-
ggplot(data = df_qq3, aes(x = Obs, y = Theo)) + geom_point(shape = 1, size = 2.5)
qq.plot3 <-
qq.plot3 + geom_abline(
slope = 1,
intercept = 0,
col = "blue",
linetype = 2,
size = 0.5
)
qq.plot3 <- qq.plot3 + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
qq.plot3 <-
qq.plot3 + xlim(0, 1650) + ylim(0 , 1650) + coord_fixed()
qq.plot3 <- qq.plot3 + xlab("Observed") + ylab("Theorical")
qq.plot3
nb.excess.Week3 <- length(obs3)
p.u <- nb.excess.Week3 / nb.epid
#One year - 10% #
n <- 1
alpha <- 0.1
R1.10 <-
thres_gpd[3] + sigma_univ[3] * (log(p.u) - log(1 - (1 - alpha) ^ (1 / n)))
#One year - 1% #
n <- 1
alpha <- 0.01
R1.1 <-
thres_gpd[3] + sigma_univ[3] * (log(p.u) - log(1 - (1 - alpha) ^ (1 / n)))
#10 years - 10% #
n <- 10
alpha <- 0.1
R10.10 <-
thres_gpd[3] + sigma_univ[3] * (log(p.u) - log(1 - (1 - alpha) ^ (1 / n)))
#10 years - 1% #
n <- 10
alpha <- 0.01
R10.1 <-
thres_gpd[3] + sigma_univ[3] * (log(p.u) - log(1 - (1 - alpha) ^ (1 / n)))
risks <- data.frame(R1.10, R1.1, R10.10, R10.1)
rownames(risks) <- c("risks")
colnames(risks) <- c("1 year - 10%", "1 year - 1%", "10 years - 10%", "10 years - 1%")
kable(risks)
| 1 year - 10% | 1 year - 1% | 10 years - 10% | 10 years - 1% | |
|---|---|---|---|---|
| risks | 1192.096 | 2094.019 | 2075.627 | 2994.171 |
The estimate of \(\widehat p_u\) is equal to 0.8823529.
epid_data$excess_stand <- rep(NA, length(epid_data$season))
for (i in 1:d) {
epid_data[epid_data$weeks == i,]$excess_stand <-
epid_data[epid_data$weeks == i,]$excess / sigma_univ[i]
}
season_NA <-
unique(subset(x = epid_data, is.na(epid_data$excess))$season)
epid_data_ssNA <- subset(x = epid_data, season %nin% season_NA)
excess_stand_matrix <-
matrix(epid_data_ssNA$excess_stand,
ncol = d,
byrow = T)
nb.epid_extreme <- sum(!is.na(epid_data$excess))/3
The number of extreme epidemics is 32.
For each model, we do the optimisation 50 times and then choose the parameters of the iteration which has the smaller -log(LLK). Since it takes a lot of time, we save the results in the file.
# nb.run <- 50
# fit.Gumbel_fun <- function(i) {
# library('pracma')
# alpha0 <- runif(d, 1.2, 50)
# b0 <- runif(d - 1, -1, 1) # if beta1 is fixed to 0 beta2 and beta 3 are close to 0
# beta1 <- 0
#
# fitGumbelU_standard <- optim(
# par = c(alpha0, b0),
# fn = LLKGumbelMGPDstand_U,
# x = excess_stand_matrix,
# beta1 = beta1,
# lw = c(1.1,-2),
# up = c(100, 2),
# control = list(maxit = 1e7, reltol = 1e-15)
# )
#
# return(
# c(
# fitGumbelU_standard$par[1:d],
# fitGumbelU_standard$par[-(1:d)],
# fitGumbelU_standard$value
# )
# )
# }
#
#
# x_gumbel <- foreach(i = 1:nb.run) %dopar% fit.Gumbel_fun(i)
# res.Gumbel <-
# matrix(unlist(x_gumbel),
# nrow = nb.run,
# ncol = 2 * d,
# byrow = T)
#
# res.Gumbel <- as.data.frame(res.Gumbel)
# colnames(res.Gumbel) <-
# c("alpha1", "alpha2", "alpha3", "beta2", "beta3", "LLK")
# bestGumbel <- which.min(res.Gumbel$LLK)
#
# write.csv(res.Gumbel, file = 'res.GumbelFitWeek3.csv')
res.Gumbel <- read.csv(file = "res.GumbelFitWeek3.csv", header = T, sep =",")
res.Gumbel <- res.Gumbel[,-1]
bestGumbel <- which.min(res.Gumbel$LLK)
AIC_Gumbel <-
2 * (dim(res.Gumbel)[2]-1) + 2 * res.Gumbel[bestGumbel, d + 3]
BIC_Gumbel <-
log(dim(excess_stand_matrix)[1]) * (dim(res.Gumbel)[2]-1) + 2 *
res.Gumbel[bestGumbel, d + 3]
Note that the reserve Gumbel model can be very instable but AIC and BIC are always much larger than the other two models
# nb.run <- 50
# fit.RevGumbel_fun <- function(i) {
# library('pracma')
# alpha0 <- runif(d, 1.2, 2)
# b0 <- rep(0.1, 2)
# beta1 <- 0
#
# fitRevGumbelU_standard <-
# optim(
# par = c(alpha0, b0),
# fn = LLKRevGumbelMGPDstand_U,
# x = excess_stand_matrix,
# beta1 = beta1,
# lw = c(1e-12,-1),
# up = c(100, 1),
# control = list(maxit = 1e7, reltol = 1e-15)
# )
#
# return(
# c(
# fitRevGumbelU_standard$par[1:d],
# fitRevGumbelU_standard$par[-(1:d)],
# fitRevGumbelU_standard$value
# )
# )
# }
# x_revgumbel <- foreach(i = 1:nb.run) %dopar% fit.RevGumbel_fun(i)
# res.RevGumbel <-
# matrix(
# unlist(x_revgumbel),
# nrow = nb.run,
# ncol = 2 * d,
# byrow = T
# )
#
# res.RevGumbel <- as.data.frame(res.RevGumbel)
# colnames(res.RevGumbel) <-
# c("alpha1", "alpha2", "alpha3", "beta2", "beta3", "LLK")
#
# write.csv(res.RevGumbel, file = 'res.RevGumbelFitWeek3.csv')
res.RevGumbel <- read.csv(file = "res.RevGumbelFitWeek3.csv", header = T, sep =",")
res.RevGumbel <- res.RevGumbel[,-1]
bestRevGumbel <- which.min(res.RevGumbel$LLK)
AIC_RevGumbel <-
2 *(dim(res.RevGumbel)[2]-1) + 2 * res.RevGumbel[bestRevGumbel, d +
3]
BIC_RevGumbel <-
log(dim(excess_stand_matrix)[1]) *(dim(res.RevGumbel)[2]-1) + 2 *
res.RevGumbel[bestRevGumbel, d + 3]
# nb.run <- 50
# fit.RevExpo_fun <- function(i) {
# library('pracma')
# alpha0 <- runif(d, 0.2, 5)
# b0 <- runif(d - 1, -2.5, 2.5)
# beta1 <- 0
#
# fitRevExpoU_standard <- optim(
# par = c(alpha0, b0),
# fn = LLKRevExpoMGPDstand_U,
# x = excess_stand_matrix,
# beta1 = beta1,
# lw = c(0.1,-3),
# up = c(100, 3),
# control = list(maxit = 1e7, reltol = 1e-15)
# )
# return(
# c(
# fitRevExpoU_standard$par[1:d],
# fitRevExpoU_standard$par[-(1:d)],
# fitRevExpoU_standard$value
# )
# )
# }
#
# x_revexpo <- foreach(i = 1:nb.run) %dopar% fit.RevExpo_fun(i)
# res.RevExpo <-
# matrix(unlist(x_revexpo),
# nrow = nb.run,
# ncol = 2 * d,
# byrow = T)
#
# res.RevExpo <- as.data.frame(res.RevExpo)
# colnames(res.RevExpo) <-
# c("alpha1", "alpha2", "alpha3", "beta2", "beta3", "LLK")
#
# write.csv(res.RevExpo, file = 'res.RevExpoFitWeek3.csv')
res.RevExpo <- read.csv(file = "res.RevExpoFitWeek3.csv", header = T, sep =",")
res.RevExpo <- res.RevExpo[,-1]
bestRevExpo <- which.min(res.RevExpo$LLK)
AIC_RevExpo <-
2 * (dim(res.RevExpo)[2]-1) + 2 * res.RevExpo[bestRevExpo, d + 3]
BIC_RevExpo <-
log(dim(excess_stand_matrix)[1]) * (dim(res.RevExpo)[2]-1) + 2 *
res.RevExpo[bestRevExpo, d + 3]
tab3 <- data.frame(matrix(c(AIC_Gumbel,BIC_Gumbel, AIC_RevGumbel, BIC_RevGumbel,AIC_RevExpo, BIC_RevExpo), ncol = 3))
colnames(tab3) <- c('Gumbel', 'RevGumbel', 'RevExpo')
rownames(tab3) <- c('AIC', 'BIC')
kable(tab3)
| Gumbel | RevGumbel | RevExpo | |
|---|---|---|---|
| AIC | 194.4243 | 2188.914 | 207.7058 |
| BIC | 201.7530 | 2196.243 | 215.0345 |
Selected model = Gumbel model
est.alpha_M1 <- as.numeric(res.Gumbel[bestGumbel, 1:d])
est.beta_M1 <- c(0, as.numeric(res.Gumbel[bestGumbel, (d + 1):(d + 2)]))
LLKGumbel <- as.numeric(res.Gumbel[bestGumbel, d+3])
alpha0 <- est.alpha_M1
beta <- rep(0, d)
fitGumbelU_standard_alpha <-
optim(
par = alpha0,
fn = LLKGumbelMGPDstand_U_alpha,
x = excess_stand_matrix,
beta = beta,
lw = c(1.1),
up = c(Inf),
control = list(maxit = 1e4, reltol = 1e-10)
)
AIC_M2 <-
2 * 5 + 2 * fitGumbelU_standard_alpha$value
BIC_M2 <-
log(dim(excess_stand_matrix)[1]) * 5 +
2 * fitGumbelU_standard_alpha$value
T <-
-2 * (LLKGumbel - fitGumbelU_standard_alpha$value)
LR_M2 <-
pchisq(q = T,
df = (5 - length(fitGumbelU_standard_alpha$par)),
lower.tail = F)
a0 <- mean(est.alpha_M1)
b0 <- est.beta_M1[2:3]
beta1 <- 0
fitGumbelU_standard_abeta <-
optim(
par = c(a0, b0),
fn = LLKGumbelMGPDstand_U_abeta,
x = excess_stand_matrix,
beta1 = beta1,
lw = c(1.1,-4),
up = c(Inf, 4),
control = list(maxit = 1e4, reltol = 1e-10)
)
AIC_M3 <-
2 * 5 + 2 * fitGumbelU_standard_abeta$value
BIC_M3 <-
log(dim(excess_stand_matrix)[1]) * 5 +
2 * fitGumbelU_standard_abeta$value
T <-
-2 * (LLKGumbel - fitGumbelU_standard_abeta$value)
LR_M3 <-
pchisq(
q = T,
df = 5 - length(fitGumbelU_standard_abeta$par),
lower.tail = F
)
a0 <- mean(est.alpha_M1)
beta <- rep(0, d)
fitGumbelU_standard_a <- optimize(
f = LLKGumbelMGPDstand_U_a,
x = excess_stand_matrix,
beta = beta,
lw = c(1.1),
up = c(Inf),
interval = c(1.1, 10)
)
AIC_M4 <- 2 + 2 * fitGumbelU_standard_a$objective
BIC_M4 <-
log(dim(excess_stand_matrix)[1]) + 2 * fitGumbelU_standard_a$objective
T <- -2 * (LLKGumbel - fitGumbelU_standard_a$objective)
LR_M4 <-
pchisq(
q = T,
df = 5 - length(fitGumbelU_standard_a$par),
lower.tail = F
)
LR_M4
## [1] 1.915197e-12
tab4 <- data.frame(matrix(c(AIC_Gumbel,AIC_M2,AIC_M3,AIC_M4,BIC_Gumbel, BIC_M2, BIC_M3,BIC_M4,NA,LR_M2, LR_M3, LR_M4), ncol = 3))
colnames(tab4) <- c('AIC', 'BIC', 'LR')
rownames(tab4) <- c('M1', 'M2', 'M3','M4')
kable(tab4, digits = 20)
| AIC | BIC | LR | |
|---|---|---|---|
| M1 | 194.4243 | 201.7530 | NA |
| M2 | 257.3854 | 264.7141 | 2.129006e-14 |
| M3 | 220.1578 | 227.4865 | 2.582474e-06 |
| M4 | 250.3020 | 251.7677 | 1.915197e-12 |
Selected model : M1
est.matrix_M1 <- rbind(est.alpha_M1, est.beta_M1)
write.csv(est.matrix_M1, file = "EstimatesParametersWeek3.csv")
est.M1 <- data.frame(est.matrix_M1)
rownames(est.M1) <- c("alpha", "beta")
colnames(est.M1) <- c("1", "2", "3")
kable(est.M1)
| 1 | 2 | 3 | |
|---|---|---|---|
| alpha | 2.220851 | 10.3661425 | 3.2111572 |
| beta | 0.000000 | 0.8367395 | 0.5957117 |
den <- 0
num <- 0
for (i in 1985:2018) {
temp <- epid_data[epid_data$season == i, ]
den <-
den + (temp[temp$weeks == 1,]$t_inc < thres_gpd[1]) * (temp[temp$weeks == 2,]$t_inc < thres_gpd[2])
num <-
num + (temp[temp$weeks == 1,]$t_inc < thres_gpd[1]) * (temp[temp$weeks == 2,]$t_inc < thres_gpd[2]) * (temp[temp$weeks == 3,]$t_inc > thres_gpd[3])
}
proba_extreme <- num / den
write.csv(proba_extreme, file = "ProbaExtremeWeek3.csv")
incidences2019 <- subset(x = incidences, subset = (season == 2019))
epid_data2019 <-
EpidemicDataFrame(data = incidences2019, d = 3, thres = thres_epid)
epid_data2019$excess <- rep(NA, length(epid_data2019$season))
epid_data2019$excess_stand <- rep(NA, length(epid_data2019$season))
for (j in unique(epid_data2019$season)) {
temp <- epid_data2019[epid_data2019$season == j, ]
if (sum(temp$t_inc > thres_gpd, na.rm = T) == 0) {
epid_data2019[epid_data2019$season == j, ]$excess <- rep(NA, d)
}
else {
epid_data2019[epid_data2019$season == j, ]$excess <-
temp$t_inc- thres_gpd
}
}
for (i in 1:d) {
epid_data2019[epid_data2019$weeks == i, ]$excess_stand <-
epid_data2019[epid_data2019$weeks == i, ]$excess / sigma_univ[i]
}
We will estimate the probabilty that the Week 3 ILI incidence rates will exceed the following thresholds defined as multiple of the observed maximum of ILI rates of the third weeks of all epidemics (which is equal to 1729).
thres <- max(epid_data[epid_data$weeks == 3, ]$t_inc)
thres_stand <- (thres-thres_gpd[3])/sigma_univ[3]
mult_pred <- c(0.5, 0.75,0.95,1,1.2)
thres_pred <- mult_pred*thres_stand
write.csv(thres_pred, file="FixedPredThresWeek3.csv")
thres_max <- data.frame(t(mult_pred*thres))
colnames(thres_max) <- c("0.5", "0.75", "0.95", "1", "1.2")
rownames(thres_max) <- c( "thresholds")
kable(thres_max)
| 0.5 | 0.75 | 0.95 | 1 | 1.2 | |
|---|---|---|---|---|---|
| thresholds | 864.5 | 1296.75 | 1642.55 | 1729 | 2074.8 |
proba.pred <- matrix(NA, nrow = 1, ncol = length(thres_pred))
proba.predict_GPD <- matrix(NA, nrow = 1, ncol = length(thres_pred))
proba.predict_LOGIT <-
matrix(NA, nrow = 1, ncol = length(thres_pred))
topredict_2019 <-
data.frame(week1 = epid_data2019[epid_data2019$weeks == 1,]$excess_stand,
week2 = epid_data2019[epid_data2019$weeks == 2,]$excess_stand)
x12 <-
2 * c(epid_data2019[epid_data2019$weeks == 1, ]$excess_stand, epid_data2019[epid_data2019$weeks == 2, ]$excess_stand)
for (i in 1:length(thres_pred)) {
proba.predict_GPD[, i] <-
excessprobGumbelU_3g12(
v3 = thres_pred[i],
x12 = x12,
alpha = est.alpha_M1,
beta = est.beta_M1
) * proba_extreme_3g12(x12 = x12)
}
reg_2019 <-
matrix(data = c(epid_data_ssNA[epid_data_ssNA$weeks == 1, ]$excess_stand,
epid_data_ssNA[epid_data_ssNA$weeks == 2, ]$excess_stand),ncol = 2)
for (i in 1:2){
response <- as.numeric(epid_data_ssNA[epid_data_ssNA$weeks == 3, ]$excess_stand > thres_pred[i])
fit <- glmnet(x = reg_2019, y = response, family = "binomial", alpha = 0, lambda = 1)
proba.predict_LOGIT[, i] <- predict(object = fit,
newx = matrix(x12, ncol =2),
type = "response")
}
proba.pred <- data.frame(matrix(c(thres*mult_pred,
proba.predict_GPD,
proba.predict_LOGIT), ncol = 5, byrow = T))
colnames(proba.pred) <- c("0.5", "0.75", "0.95", "1", "1.2")
rownames(proba.pred) <- c("Threshold", "GP", "Logistic")
kable(proba.pred)
| 0.5 | 0.75 | 0.95 | 1 | 1.2 | |
|---|---|---|---|---|---|
| Threshold | 864.5000000 | 1296.7500000 | 1.64255e+03 | 1.729e+03 | 2.0748e+03 |
| GP | 0.1851058 | 0.0119246 | 1.22860e-03 | 6.952e-04 | 7.1200e-05 |
| Logistic | 0.1743116 | 0.1041239 | NA | NA | NA |
epid_data_8519 <- rbind(epid_data, epid_data2019)
nb.epid <- length(unique(epid_data_8519$season))
level_gpd <- c(0.9, 0.9, 0.9)
thres_gpd <- rep(NA, 3)
for (i in 1:3) {
thres_gpd[i] <-
quantile(incidences$t_inc, probs = level_gpd[i], na.rm = T)
}
train_list <- list()
test_list <- list()
for (i in 1:nb.epid) {
season_i <- unique(epid_data_8519$season)[i]
index.remove <- which(epid_data_8519$season == season_i)
epid_data_test <-
subset(x = epid_data_8519, subset = season == season_i)
epid_data_LOO <- epid_data_8519[-index.remove,]
#Marginal Expo Fit
sigma_univ <- rep(0, d)
df.param_univ <- rep(NA, d)
for (k in 1:d) {
week <-
epid_data_LOO[(epid_data_LOO$weeks == k) &
!is.na(epid_data_LOO$t_inc), ]$t_inc
univ.fit <-
fevd(
x = week,
threshold = thres_gpd[k],
type = "Exponential",
method = "MLE"
)
sigma_univ[k] <- univ.fit$results$par[1]
}
#Create excesses
`%nin%` = Negate(`%in%`)
epid_data_LOO$excess <- rep(NA, length(epid_data_LOO$season))
epid_data_LOO$excess_stand <-
rep(NA, length(epid_data_LOO$season))
for (j in epid_data_LOO$season) {
temp <- epid_data_LOO[epid_data_LOO$season == j,]
if (sum(temp$t_inc > thres_gpd[1:d], na.rm = T) == 0) {
epid_data_LOO[epid_data_LOO$season == j,]$excess <- rep(NA, d)
}
else {
epid_data_LOO[epid_data_LOO$season == j,]$excess <-
temp$t_inc - thres_gpd[1:d]
}
}
for (k in 1:d) {
epid_data_LOO[epid_data_LOO$weeks == k,]$excess_stand <-
epid_data_LOO[epid_data_LOO$weeks == k,]$excess / sigma_univ[k]
}
season_NA_LOO <-
unique(subset(x = epid_data_LOO, is.na(epid_data_LOO$excess))$season)
train_list[[i]] <-
subset(x = epid_data_LOO, season %nin% season_NA_LOO)
epid_data_test$excess <- rep(0, 3)
epid_data_test$excess_stand <- rep(0, 3)
for (k in 1:d) {
epid_data_test[epid_data_test$weeks == k,]$excess <-
epid_data_test[epid_data_test$weeks == k,]$t_inc - thres_gpd[k]
epid_data_test[epid_data_test$weeks == k,]$excess_stand <-
epid_data_test[epid_data_test$weeks == k,]$excess / sigma_univ[k]
}
test_list[[i]] <- epid_data_test
}
LOO_fit_parallel <- foreach(
i = 1:nb.epid,
.packages = c("extRemes", "pracma"),
.combine = rbind
) %dopar% {
LOO_fit(i,train_list = train_list)
}
res.fitLOO_week3 <- data.frame(LOO_fit_parallel)
colnames(res.fitLOO_week3) <- c("Indice", "alpha1", "alpha2","alpha3", "beta1", "beta2", "beta3")
est.alpha_list <- list()
for (i in 1:nb.epid) {
est.alpha_list [[i]] <-
c(res.fitLOO_week3[i,]$alpha1,
res.fitLOO_week3[i,]$alpha2,
res.fitLOO_week3[i,]$alpha3)
}
est.beta_list <- list()
for (i in 1:nb.epid) {
est.beta_list [[i]] <-
c(res.fitLOO_week3[i,]$beta1,
res.fitLOO_week3[i,]$beta2,
res.fitLOO_week3[i,]$beta3)
}
res_parallel_GPD <- foreach(
i = 1:nb.epid,
.packages = c("extRemes", "pracma"),
.combine = rbind
) %dopar% {
pred_parallel_GPD(i, train_list = train_list, test_list = test_list, thres_pred = thres_pred)
}
res_week3_LOO_GPD <- data.frame(res_parallel_GPD)
colnames(res_week3_LOO_GPD) <-
c(
"Week3",
"Predict_Gumbel_05",
"Predict_Gumbel_075",
"Predict_Gumbel_095",
"Predict_Gumbel_1",
"Predict_Gumbel_12",
"LLK"
)
res_week3_LOO_GPD$season <- 1985:2019
res_parallel_LOGIT<- foreach(
i = 1:nb.epid,
.packages = c("glmnet"),
.combine = rbind
) %dopar% {
pred_parallel_LOGIT(i, train_list = train_list, test_list = test_list, thres_pred = thres_pred)
}
res_week3_LOO_LOGIT <- data.frame(res_parallel_LOGIT)
colnames(res_week3_LOO_LOGIT) <-
c(
"Predict_Logit_05",
"Predict_Logit_075",
"Predict_Logit_095",
"Predict_Logit_1",
"Predict_Logit_12"
)
res_week3_LOO <- cbind(res_week3_LOO_GPD, res_week3_LOO_LOGIT)
res_week3_05 <-
subset(res_week3_LOO,
select = c(Week3, Predict_Gumbel_05, Predict_Logit_05))
res_week3_05$Outcome <-
as.numeric(res_week3_05$Week3 > thres_pred[1])
x.plot <-
ggplot(data = res_week3_05, aes(x = as.factor(Outcome), y = Predict_Gumbel_05))
x.plot <-
x.plot + geom_point(shape = 1, size = 3)
x.plot <- x.plot + ggtitle("GP prediction")
x.plot <-
x.plot + xlab("Outcome") + ylab("Prediction probability")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + ylim(0,1)
y.plot <-
ggplot(data = res_week3_05, aes(x = as.factor(Outcome), y = Predict_Logit_05))
y.plot <-
y.plot + geom_point(shape = 1, size = 3)
y.plot <-
y.plot + xlab("Outcome") + ylab("Prediction probability")
y.plot <- y.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
y.plot <- y.plot + ggtitle("Logistic regression")
y.plot <- y.plot + ylim(0,1)
z.plot <- grid.arrange(x.plot, y.plot, ncol = 2)
res_week3_075 <-
subset(res_week3_LOO,
select = c(Week3, Predict_Gumbel_075, Predict_Logit_075))
res_week3_075$Outcome <-
as.numeric(res_week3_075$Week3 > thres_pred[2])
x.plot <-
ggplot(data = res_week3_075, aes(x = as.factor(Outcome), y = Predict_Gumbel_075))
x.plot <-
x.plot + geom_point(shape = 1, size = 3)
x.plot <- x.plot + ggtitle("GP prediction")
x.plot <-
x.plot + xlab("Outcome") + ylab("Prediction probability")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + ylim(0,1)
y.plot <-
ggplot(data = res_week3_075, aes(x = as.factor(Outcome), y = Predict_Logit_075))
y.plot <-
y.plot + geom_point(shape = 1, size = 3)
y.plot <- y.plot + ggtitle("Logistic regression")
y.plot <-
y.plot + xlab("Outcome") + ylab("Prediction probability")
y.plot <- y.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
y.plot <- y.plot + ylim(0,1)
z.plot <- grid.arrange(x.plot, y.plot, ncol = 2)
res_week3_095 <-
subset(res_week3_LOO,
select = c(Week3, Predict_Gumbel_095))
res_week3_095$Outcome <-
as.numeric(res_week3_095$Week3 > thres_pred[3])
x.plot <-
ggplot(data = res_week3_095, aes(x = as.factor(Outcome), y = Predict_Gumbel_095))
x.plot <-
x.plot + geom_point(shape = 1, size = 3)
x.plot <- x.plot + ggtitle("GP prediction")
x.plot <-
x.plot + xlab("Outcome") + ylab("Prediction probability")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + ylim(0,1)
x.plot
res_week3_1 <-
subset(res_week3_LOO, select = c(Week3, Predict_Gumbel_1))
res_week3_1$Outcome <- as.numeric(res_week3_1$Week3 > thres_pred[4])
x.plot <-
ggplot(data = res_week3_1, aes(x = as.factor(Outcome), y = Predict_Gumbel_1))
x.plot <-
x.plot + geom_point(shape = 1, size = 3)
x.plot <- x.plot + ggtitle("GP prediction")
x.plot <-
x.plot + xlab("Outcome") + ylab("Prediction probability")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + ylim(0,1)
res_week3_12 <-
subset(res_week3_LOO, select = c(Week3, Predict_Gumbel_12))
res_week3_12$Outcome <-
as.numeric(res_week3_12$Week3 > thres_pred[5])
x.plot <-
ggplot(data = res_week3_12, aes(x = as.factor(Outcome), y = Predict_Gumbel_12))
x.plot <-
x.plot + geom_point(shape = 1, size = 3)
x.plot <- x.plot + ggtitle("GP prediction")
x.plot <-
x.plot + xlab("Outcome") + ylab("Prediction probability")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
x.plot <- x.plot + ylim(0,1)
x.plot
## 0.5
p_05 <- mean(res_week3_05$Outcome)
brier_gumbel_05 <- 1 - mean((res_week3_05$Predict_Gumbel_05 - res_week3_05$Outcome) ^ 2) /
(p_05 * (1 - p_05))
brier_logit_05 <- 1 - mean((res_week3_05$Predict_Logit_05 - res_week3_05$Outcome) ^ 2,
na.rm = T) /(p_05 * (1 - p_05))
## 0.75
p_075 <- mean(res_week3_075$Outcome)
brier_gumbel_075 <-
1 - mean((res_week3_075$Predict_Gumbel_075 - res_week3_075$Outcome) ^ 2) /
(p_075 * (1 - p_075))
brier_logit_075 <-
1 - mean((res_week3_075$Predict_Logit_075 - res_week3_075$Outcome) ^ 2,
na.rm = T) /
(p_075 * (1 - p_075))
##0.95
p_095 <- mean(res_week3_095$Outcome)
brier_gumbel_095 <-
1 - mean((res_week3_095$Predict_Gumbel_095 - res_week3_095$Outcome) ^ 2) /
(p_095 * (1 - p_095))
brier_logit_095 <-
1 - mean((res_week3_095$Predict_Logit_095 - res_week3_095$Outcome) ^ 2,
na.rm = T) /
(p_095 * (1 - p_095))
##1
p_1 <- mean(res_week3_1$Outcome)
brier_gumbel_1 <-
1 - mean((res_week3_1$Predict_Gumbel_1 - res_week3_1$Outcome) ^ 2) /
(p_1 * (1 - p_1))
##1.2
p_12 <- mean(res_week3_12$Outcome)
brier_gumbel_12 <-
1 - mean((res_week3_12$Predict_Gumbel_12 - res_week3_12$Outcome) ^ 2) /
(p_12 * (1 - p_12))
brier_score <- data.frame(matrix(c( brier_gumbel_05, brier_gumbel_075, brier_gumbel_095, brier_gumbel_1, brier_gumbel_12, brier_logit_05, brier_logit_075, brier_logit_095, NA, NA), byrow = T, ncol = 5))
rownames(brier_score) <- c( "GP", "Logistic")
colnames(brier_score) <- c(0.5, 0.75, 0.95, 1, 1.2)
kable(brier_score)
| 0.5 | 0.75 | 0.95 | 1 | 1.2 | |
|---|---|---|---|---|---|
| GP | 0.3273776 | 0.6862972 | -0.4237232 | -0.8492024 | -Inf |
| Logistic | 0.0569408 | 0.0247828 | NaN | NA | NA |
dis.thres_LOGIT <- sort(res_week3_05$Predict_Logit_05, decreasing = TRUE)
pr.data_LOGIT <-
data.frame(DisThres = dis.thres_LOGIT, Model = rep("Logistic", length(dis.thres_LOGIT)))
for (i in 1:length(dis.thres_LOGIT)) {
pr.data_LOGIT$Precision[i] <- sum((res_week3_05$Outcome == 1) &
(res_week3_05$Predict_Logit_05 > dis.thres_LOGIT[i]) , na.rm = T
) / sum((res_week3_05$Predict_Logit_05 > dis.thres_LOGIT[i]), na.rm = T)
pr.data_LOGIT$Recall[i] <- sum((res_week3_05$Outcome == 1) &
(res_week3_05$Predict_Logit_05 > dis.thres_LOGIT[i]), na.rm = T
) / sum( (res_week3_05$Outcome == 1))
}
dis.thres_MGPD <- sort(res_week3_05$Predict_Gumbel_05, decreasing = TRUE)
pr.data_MGPD <-
data.frame(DisThres = c(dis.thres_MGPD),
Model = rep("Gumbel", length(dis.thres_MGPD)))
for (i in 1:length(dis.thres_MGPD)) {
pr.data_MGPD$Precision[i] <- sum((res_week3_05$Outcome == 1) &
(res_week3_05$Predict_Gumbel_05 > dis.thres_MGPD[i]) , na.rm = T
) / sum((res_week3_05$Predict_Gumbel_05 > dis.thres_MGPD[i]) )
pr.data_MGPD$Recall[i] <- sum((res_week3_05$Outcome == 1) &
(res_week3_05$Predict_Gumbel_05 > dis.thres_MGPD[i]), na.rm = T
) / sum( (res_week3_05$Outcome == 1))
}
### Precision and recall ###
pr.data <- rbind(pr.data_MGPD, pr.data_LOGIT)
y.plot <-
ggplot(data = pr.data, aes(
x = Recall,
y = Precision,
colour = Model,
linetype = Model
)) + geom_path()
y.plot <- y.plot + geom_hline(yintercept = sum( (res_week3_05$Outcome == 1))/length(res_week3_05$Outcome))
y.plot <- y.plot + xlim(c(0, 1)) + ylim(c(0, 1))
y.plot <- y.plot + scale_color_manual(values = c("blue", "red"))
y.plot <- y.plot + xlab("Recall") + ylab("Precision")
y.plot <- y.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
y.plot
## AUC PR ###
auc_pr_GP_05 <- PRAUC(res_week3_05$Predict_Gumbel_05, res_week3_05$Outcome)
auc_pr_Logit_05 <- PRAUC(res_week3_05[!is.na(res_week3_05$Predict_Logit_05),]$Predict_Logit_05,res_week3_05[!is.na(res_week3_05$Predict_Logit_05),]$Outcome)
## Average precision score ###
N <- length(pr.data_MGPD$Recall)
AP_GP_05 <- sum(pr.data_MGPD$Precision*(pr.data_MGPD$Recall - c(0,pr.data_MGPD$Recall[-N])), na.rm = T)
AP_LOGIT_05 <- sum(pr.data_LOGIT$Precision*(pr.data_LOGIT$Recall - c(0,pr.data_LOGIT$Recall[-N])), na.rm = T)
dis.thres_LOGIT <- sort(res_week3_075$Predict_Logit_075, decreasing = TRUE)
pr.data_LOGIT <-
data.frame(DisThres = dis.thres_LOGIT, Model = rep("Logistic", length(dis.thres_LOGIT)))
for (i in 1:length(dis.thres_LOGIT)) {
pr.data_LOGIT$Precision[i] <- sum((res_week3_075$Outcome == 1) &
(res_week3_075$Predict_Logit_075 > dis.thres_LOGIT[i]) , na.rm = T
) / sum((res_week3_075$Predict_Logit_075 > dis.thres_LOGIT[i]), na.rm = T)
pr.data_LOGIT$Recall[i] <- sum((res_week3_075$Outcome == 1) &
(res_week3_075$Predict_Logit_075 > dis.thres_LOGIT[i]), na.rm = T
) / sum( (res_week3_075$Outcome == 1))
}
dis.thres_MGPD <- sort(res_week3_075$Predict_Gumbel_075, decreasing = TRUE)
pr.data_MGPD <-
data.frame(DisThres = c(dis.thres_MGPD),
Model = rep("Gumbel", length(dis.thres_MGPD)))
for (i in 1:length(dis.thres_MGPD)) {
pr.data_MGPD$Precision[i] <- sum((res_week3_075$Outcome == 1) &
(res_week3_075$Predict_Gumbel_075 > dis.thres_MGPD[i]) , na.rm = T
) / sum((res_week3_075$Predict_Gumbel_075 > dis.thres_MGPD[i]) )
pr.data_MGPD$Recall[i] <- sum((res_week3_075$Outcome == 1) &
(res_week3_075$Predict_Gumbel_075 > dis.thres_MGPD[i]), na.rm = T
) / sum( (res_week3_075$Outcome == 1))
}
### Precision and recall ###
pr.data <- rbind(pr.data_MGPD, pr.data_LOGIT)
y.plot <-
ggplot(data = pr.data, aes(
x = Recall,
y = Precision,
colour = Model,
linetype = Model
)) + geom_path()
y.plot <- y.plot + geom_hline(yintercept = sum( (res_week3_075$Outcome == 1))/length(res_week3_075$Outcome))
y.plot <- y.plot + xlim(c(0, 1)) + ylim(c(0, 1))
y.plot <- y.plot + scale_color_manual(values = c("blue", "red"))
y.plot <- y.plot + xlab("Recall") + ylab("Precision")
y.plot <- y.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
y.plot
## AUC PR ###
auc_pr_GP_075 <- PRAUC(res_week3_075$Predict_Gumbel_075, res_week3_075$Outcome)
auc_pr_Logit_075 <- PRAUC(res_week3_075[!is.na(res_week3_075$Predict_Logit_075),]$Predict_Logit_075,res_week3_075[!is.na(res_week3_075$Predict_Logit_075),]$Outcome)
## Average precision score ###
N <- length(pr.data_MGPD$Recall)
AP_GP_075 <- sum(pr.data_MGPD$Precision*(pr.data_MGPD$Recall - c(0,pr.data_MGPD$Recall[-N])), na.rm = T)
AP_LOGIT_075 <- sum(pr.data_LOGIT$Precision*(pr.data_LOGIT$Recall - c(0,pr.data_LOGIT$Recall[-N])), na.rm = T)
dis.thres_MGPD <- sort(res_week3_095$Predict_Gumbel_095, decreasing = TRUE)
pr.data_MGPD <-
data.frame(DisThres = c(dis.thres_MGPD),
Model = rep("Gumbel", length(dis.thres_MGPD)))
for (i in 1:length(dis.thres_MGPD)) {
pr.data_MGPD$Precision[i] <- sum((res_week3_095$Outcome == 1) &
(res_week3_095$Predict_Gumbel_095 > dis.thres_MGPD[i]) , na.rm = T
) / sum((res_week3_095$Predict_Gumbel_095 > dis.thres_MGPD[i]) )
pr.data_MGPD$Recall[i] <- sum((res_week3_095$Outcome == 1) &
(res_week3_095$Predict_Gumbel_095 > dis.thres_MGPD[i]), na.rm = T
) / sum( (res_week3_095$Outcome == 1))
}
### Precision and recall ###
pr.data <- rbind(pr.data_MGPD)
y.plot <-
ggplot(data = pr.data, aes(
x = Recall,
y = Precision,
colour = Model,
linetype = Model
)) + geom_path()
y.plot <- y.plot + geom_hline(yintercept = sum( (res_week3_095$Outcome == 1))/length(res_week3_095$Outcome))
y.plot <- y.plot + xlim(c(0, 1)) + ylim(c(0, 1))
y.plot <- y.plot + scale_color_manual(values = c("blue"))
y.plot <- y.plot + xlab("Recall") + ylab("Precision")
y.plot <- y.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
y.plot
## AUC PR ###
auc_pr_GP_095 <- PRAUC(res_week3_095$Predict_Gumbel_095, res_week3_095$Outcome)
## Average precision score ###
N <- length(pr.data_MGPD$Recall)
AP_GP_095 <- sum(pr.data_MGPD$Precision*(pr.data_MGPD$Recall - c(0,pr.data_MGPD$Recall[-N])), na.rm = T)
dis.thres_MGPD <- sort(res_week3_1$Predict_Gumbel_1, decreasing = TRUE)
pr.data_MGPD <-
data.frame(DisThres = c(dis.thres_MGPD),
Model = rep("Gumbel", length(dis.thres_MGPD)))
for (i in 1:length(dis.thres_MGPD)) {
pr.data_MGPD$Precision[i] <- sum((res_week3_1$Outcome == 1) &
(res_week3_1$Predict_Gumbel_1 > dis.thres_MGPD[i]) , na.rm = T
) / sum((res_week3_1$Predict_Gumbel_1 > dis.thres_MGPD[i]) )
pr.data_MGPD$Recall[i] <- sum((res_week3_1$Outcome == 1) &
(res_week3_1$Predict_Gumbel_1 > dis.thres_MGPD[i]), na.rm = T
) / sum( (res_week3_1$Outcome == 1))
}
### Precision and recall ###
pr.data <- rbind(pr.data_MGPD)
y.plot <-
ggplot(data = pr.data, aes(
x = Recall,
y = Precision,
colour = Model,
linetype = Model
)) + geom_path()
y.plot <- y.plot + geom_hline(yintercept = sum( (res_week3_1$Outcome == 1))/length(res_week3_1$Outcome))
y.plot <- y.plot + xlim(c(0, 1)) + ylim(c(0, 1))
y.plot <- y.plot + scale_color_manual(values = c("blue"))
y.plot <- y.plot + xlab("Recall") + ylab("Precision")
y.plot <- y.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
plot.title = element_text(size = 18)
)
y.plot
## AUC PR ###
auc_pr_GP_1 <- PRAUC(res_week3_1$Predict_Gumbel_1, res_week3_1$Outcome)
## Average precision score ###
N <- length(pr.data_MGPD$Recall)
AP_GP_1 <- sum(pr.data_MGPD$Precision*(pr.data_MGPD$Recall - c(0,pr.data_MGPD$Recall[-N])), na.rm = T)
Cannot be computed because there is no exceedance of this threshold.
auc_prc <- data.frame(matrix(c(auc_pr_GP_05, auc_pr_GP_075, auc_pr_GP_095, auc_pr_GP_1, NA, auc_pr_Logit_05, auc_pr_Logit_075, NA, NA, NA), byrow = T, ncol = 5))
rownames(auc_prc) <- c( "GP", "Logistic")
colnames(auc_prc) <- c(0.5, 0.75, 0.95, 1, 1.2)
kable(auc_prc)
| 0.5 | 0.75 | 0.95 | 1 | 1.2 | |
|---|---|---|---|---|---|
| GP | 0.5537879 | 0.5694444 | 0.3333333 | 0.25 | NA |
| Logistic | 0.2428498 | 0.1811975 | NA | NA | NA |
aprec_score <- data.frame(matrix(c(AP_GP_05, AP_GP_075, AP_GP_095, AP_GP_1, NA, AP_LOGIT_05, AP_LOGIT_075, NA, NA, NA), byrow = T, ncol = 5))
rownames(aprec_score) <- c( "GP", "Logistic")
colnames(aprec_score) <- c(0.5, 0.75, 0.95, 1, 1.2)
kable(aprec_score)
| 0.5 | 0.75 | 0.95 | 1 | 1.2 | |
|---|---|---|---|---|---|
| GP | 0.7742424 | 0.9166667 | 0.5 | 0.5 | NA |
| Logistic | 0.4775092 | 0.2651727 | NA | NA | NA |
anomaly.df <- data.frame(season = unique(epid_data_8519$season))
anomaly.df$LLK <- -log(res_week3_LOO$LLK)
anomaly.df <- anomaly.df[-c(30,32),]
## Get simulations
simul.dataU_Gumbel <-
read.csv("SimulatedDataWeek3.csv",
row.names = 1
)
simulGumbel_W3 <- simul.dataU_Gumbel[simul.dataU_Gumbel$weeks == 3, ]
q99 <- quantile(simulGumbel_W3$excess, probs = 0.99)
index1 <- which(simulGumbel_W3$excess > q99-1e-5)
temp <- simulGumbel_W3[index1, ]
index2 <- which(temp[temp$weeks == 3,]$excess < q99+1e-5)
extreme.point.season <- temp[index2, ]$season
extreme.point.df <-
simul.dataU_Gumbel[simul.dataU_Gumbel$season == extreme.point.season,]
extreme.point.vec <- as.vector(extreme.point.df$excess)
LLK.extreme <-
DensityGumbelMGPDstand_U(x = extreme.point.vec, alpha = est.alpha_M1, beta = est.beta_M1)
extreme.point.df$LLK <- -log(LLK.extreme)
abnormal.point.vec <-
c(extreme.point.vec[1] * 1.5,
extreme.point.vec[2] * 0.5,
extreme.point.vec[3] * 1.5)
abnormal.point.df <-
data.frame(season = rep(1, 3),
weeks = 1:3,
excess = abnormal.point.vec)
LLK.abnormal <-
DensityGumbelMGPDstand_U(x = abnormal.point.vec, alpha = est.alpha_M1, beta = est.beta_M1)
abnormal.point.df$LLK <- -log(LLK.abnormal)
swine.point.df <- data.frame(
season = 2010,
LLK = anomaly.df[anomaly.df$season == 2010,]$LLK
)
res_week3_simus <-
read.csv(file = "PredSimus_Week3.csv", row.names = 1)
LLKsimus <- -log(res_week3_simus$LLK)
levels.10 <- quantile(LLKsimus, probs = 0.9)
levels.5 <- quantile(LLKsimus, probs = 0.95)
levels.1<- quantile(LLKsimus, probs = 0.99)
levels.01 <- quantile(LLKsimus, probs = 0.999)
tab4 <- data.frame(t(c(levels.10, levels.5,levels.1, levels.01)))
rownames(tab4) <- c("level")
colnames(tab4) <- c("10%", "5%", "1%", "0.1%")
kable(tab4)
| 10% | 5% | 1% | 0.1% | |
|---|---|---|---|---|
| level | 4.720036 | 5.603064 | 7.78854 | 14.50502 |
x.plot <-
ggplot(data = anomaly.df, aes(x = season, y = LLK))
x.plot <-
x.plot + geom_point(shape = 1)
x.plot <-
x.plot + geom_point(data = extreme.point.df, aes(2001, LLK), shape = 15, size = 2)#square
x.plot <-
x.plot + geom_point(data = abnormal.point.df, aes(2015, LLK), shape = 17, size = 2) #triangle
x.plot <-
x.plot + geom_point(data = swine.point.df, aes(2010, LLK), shape = 19, size = 2)#circle
x.plot <-
x.plot + geom_hline(yintercept = levels.1,
linetype = 2,
color = "blue")
x.plot <-
x.plot + geom_hline(yintercept = levels.01,
linetype = 3,
color = "blue")
x.plot <-
x.plot + xlab("Season") + ylab("Negative log-likelihood")
x.plot <- x.plot + theme(
panel.grid.minor = element_blank(),
text = element_text(size = 16),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
plot.title = element_text(size = 18)
)
x.plot